####################################################################################################
### Title: People Consistently View Elections and Civil Liberties as Key Components of Democracy ###
### Content: Analysis of ranking of attribute features                                           ###
### Date: August 24, 2024                                                                        ###
####################################################################################################

### Set-up ----
## Clean the working environment and set the working directory
rm(list = ls())
setwd("~/Desktop/Science_Replication/all_countries")

## Install the cjRank package (if not yet installed)
# library(devtools)  # version 2.4.3
# install_github(repo = "carl-mc/cjRank")

## Load the required packages
library(cjRank)    # version 1.0
library(tidyverse) # version 2.0.0

## Read the cleaned datasets
df_US <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_US.csv")
df_JP <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_JP.csv")
df_EG <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_EG.csv")
df_IN <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_IN.csv")
df_IT <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_IT.csv")
df_TH <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_TH.csv")

## Merge the datasets
df_cj <- bind_rows(df_US, df_JP, df_EG, df_IN, df_IT, df_TH)

### Define objects with variable names ----
## Categorical attribute variables
attr.vars <- c("election", "civil", "leader", "econ", "gender", 
               "populist", "direct", "expert", "obedient")

## Binary outcome variable
outcome <- "selected"

## Unique respondent ID
respid <- "id"

## Unique task x respondent ID
df_cj$resp_pair <- paste0(df_cj$id, ".", df_cj$task)
taskid <- "resp_pair"

### Setting up a cjRank object ----
df_rank <- cjRank$new(data = df_cj, 
                      outcome.var = outcome,
                      task_id.var = taskid,
                      attr_cat.var = attr.vars,
                      resp_id.var = respid,
                      min_obs = 100,
                      cluster_var = respid)

### Derive order and ranking of attribute features ----
order <- df_rank$get_order()
df_rank$feature_key[order]
df_rank$get_ranks()

### Bootstrap the ordering ----
set.seed(1234567)
sum.boot <- df_rank$get_ranks_bs(iter = 100) # this will take several minutes

### Table S6: ranking of attribute-levels ----
sum.boot <- sum.boot[order(sum.boot$rank, sum.boot$mean),]
sum.boot

### Nested marginal means ----
nested_mm <- df_rank$get_nested_margmean(adj_coocurrence = TRUE)

### F-statistics of differences between nested marginal means ----
ftest.df <- df_rank$get_rank_fstats()

### Within-rank marginal means ----
within_mm <- df_rank$get_within_rank_margmean(adj_coocurrence = TRUE)

### Figure S14: visualize the nested marginal means ----
coef.df <- nested_mm
coef.df <- coef.df %>% mutate(level = case_when(
  feature_key == "Elections are free and fair" | 
    feature_key == "Civil liberties are strongly protected" |
    feature_key == "Leader is highly constrained" |
    feature_key == "Economic equality is high" |
    feature_key == "Gender equality is high" |
    feature_key == "Leader frequently follows the majority" |
    feature_key == "Policies are frequently voted on" |
    feature_key == "Experts have much influence on policy" |
    feature_key == "Dissidents mostly challenge the gov't" ~ 1,
  feature_key == "Elections are biased" |
    feature_key == "Civil liberties are weakly protected" |
    feature_key == "Leader is somewhat constrained" |
    feature_key == "Economic equality is somewhat low" |
    feature_key == "Gender equality is somewhat low" |
    feature_key == "Leader sometimes follows the majority" |
    feature_key == "Policies are sometimes voted on" |
    feature_key == "Experts have some influence on policy" |
    feature_key == "Dissidents occasionally obey the gov't" ~ 2,
  feature_key == "Elections are not held" |
    feature_key == "Civil liberties are not at all protected" |
    feature_key == "Leader is weakly constrained" |
    feature_key == "Economic equality is very low" |
    feature_key == "Gender equality is very low" |
    feature_key == "Leader rarely follows the majority" |
    feature_key == "Policies are rarely voted on" |
    feature_key == "Experts have small influence on policy" |
    feature_key == "Dissidents mostly obey the gov't" ~ 3
))
coef.df$level <- factor(coef.df$level)

coef.df$rank_num <- coef.df$rank

coef.df$rank <- c(rep("Baseline", 27), 
                  rep("Free and fair elections", 26), rep("Strong civil liberties", 25), 
                  rep("High gender equality", 24), rep("Highly constrained leader", 23), 
                  rep("", nrow(coef.df) - 27 - 26 - 25 - 24 - 23))
coef.df$rank <- paste0(coef.df$rank,
                       ifelse(coef.df$rank == "Baseline", 
                              paste0("\n\nN: ", ftest.df$N[coef.df$rank_num + 1], "\n"),
                              paste0(c(NA, 
                                       paste0("\nRank ", sum.boot$rank, " [", 
                                              sum.boot$lower_bound, ";", 
                                              sum.boot$upper_bound, "]")[!duplicated(sum.boot$rank)])[coef.df$rank_num + 1],
                                     "\nN: ", ftest.df$N[coef.df$rank_num + 1], "\n",
                                     "F-stat: ", c(NA, signif(ftest.df$fstat, 2))[coef.df$rank_num + 1])))
coef.df$rank <- factor(coef.df$rank, levels = unique(coef.df$rank), ordered = T)

coef.df$treat_type <- 
  factor(coef.df$attribute,
         levels = c("election", "civil", "leader", "populist", "obedient",
                    "econ", "gender", "expert", "direct"),
         labels = c("Electoral", "Liberal", "Institutional", "Populist", "Loyalist",
                    "Economic", "Gender", "Technocratic", "Direct"))

ggplot(subset(coef.df, rank_num < 5), 
       aes(x = level, y = coef)) +
  geom_hline(yintercept = .5, lty = 2, col = "gray50") +
  geom_point() +
  geom_segment(aes(xend = level, 
                   y = coef - 1.96 * se, 
                   yend = coef + 1.96 * se)) +
  scale_y_continuous(breaks = seq(.4, .6, by = .1)) +
  facet_grid(treat_type ~ rank) +
  labs(x = "Attribute Level", y = "Marginal Means\n") +
  theme_minimal(base_size = 12, base_family = "Times") %+replace%
  theme(panel.spacing = unit(1, "lines"),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.x = element_blank())
ggsave("Figure S14.pdf", width = 9, height = 10)
